home *** CD-ROM | disk | FTP | other *** search
Text File | 1992-01-01 | 2.8 KB | 129 lines | [TEXT/R*ch] |
- # Module 'parser'
- #
- # Parse S-expressions output by the Panel Editor
- # (which is written in Scheme so it can't help writing S-expressions).
- #
- # See notes at end of file.
-
-
- whitespace = ' \t\n'
- operators = '()\''
- separators = operators + whitespace + ';' + '"'
-
-
- # Tokenize a string.
- # Return a list of tokens (strings).
- #
- def tokenize_string(s):
- tokens = []
- while s:
- c = s[:1]
- if c in whitespace:
- s = s[1:]
- elif c == ';':
- s = ''
- elif c == '"':
- n = len(s)
- i = 1
- while i < n:
- c = s[i]
- i = i+1
- if c == '"': break
- if c == '\\': i = i+1
- tokens.append(s[:i])
- s = s[i:]
- elif c in operators:
- tokens.append(c)
- s = s[1:]
- else:
- n = len(s)
- i = 1
- while i < n:
- if s[i] in separators: break
- i = i+1
- tokens.append(s[:i])
- s = s[i:]
- return tokens
-
-
- # Tokenize a whole file (given as file object, not as file name).
- # Return a list of tokens (strings).
- #
- def tokenize_file(fp):
- tokens = []
- while 1:
- line = fp.readline()
- if not line: break
- tokens = tokens + tokenize_string(line)
- return tokens
-
-
- # Exception raised by parse_exr.
- #
- syntax_error = 'syntax error'
-
-
- # Parse an S-expression.
- # Input is a list of tokens as returned by tokenize_*().
- # Return a pair (expr, tokens)
- # where expr is a list representing the s-expression,
- # and tokens contains the remaining tokens.
- # May raise syntax_error.
- #
- def parse_expr(tokens):
- if (not tokens) or tokens[0] <> '(':
- raise syntax_error, 'expected "("'
- tokens = tokens[1:]
- expr = []
- while 1:
- if not tokens:
- raise syntax_error, 'missing ")"'
- if tokens[0] == ')':
- return expr, tokens[1:]
- elif tokens[0] == '(':
- subexpr, tokens = parse_expr(tokens)
- expr.append(subexpr)
- else:
- expr.append(tokens[0])
- tokens = tokens[1:]
-
-
- # Parse a file (given as file object, not as file name).
- # Return a list of parsed S-expressions found at the top level.
- #
- def parse_file(fp):
- tokens = tokenize_file(fp)
- exprlist = []
- while tokens:
- expr, tokens = parse_expr(tokens)
- exprlist.append(expr)
- return exprlist
-
-
- # EXAMPLE:
- #
- # The input
- # '(hip (hop hur-ray))'
- #
- # passed to tokenize_string() returns the token list
- # ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
- #
- # When this is passed to parse_expr() it returns the expression
- # ['hip', ['hop', 'hur-ray']]
- # plus an empty token list (because there are no tokens left.
- #
- # When a file containing the example is passed to parse_file() it returns
- # a list whose only element is the output of parse_expr() above:
- # [['hip', ['hop', 'hur-ray']]]
-
-
- # TOKENIZING:
- #
- # Comments start with semicolon (;) and continue till the end of the line.
- #
- # Tokens are separated by whitespace, except the following characters
- # always form a separate token (outside strings):
- # ( ) '
- # Strings are enclosed in double quotes (") and backslash (\) is used
- # as escape character in strings.
-